{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Exercise 6 - Answer.ipynb",
      "version": "0.3.2",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "metadata": {
        "id": "dn-6c02VmqiN",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "import os\n",
        "import zipfile\n",
        "import random\n",
        "import tensorflow as tf\n",
        "from tensorflow.keras.optimizers import RMSprop\n",
        "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
        "from shutil import copyfile"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "3sd9dQWa23aj",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# If the URL doesn't work, visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765\n",
        "# And right click on the 'Download Manually' link to get a new URL to the dataset\n",
        "\n",
        "# Note: This is a very large dataset and will take time to download\n",
        "\n",
        "!wget --no-check-certificate \\\n",
        "    \"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip\" \\\n",
        "    -O \"/tmp/cats-and-dogs.zip\"\n",
        "\n",
        "local_zip = '/tmp/cats-and-dogs.zip'\n",
        "zip_ref   = zipfile.ZipFile(local_zip, 'r')\n",
        "zip_ref.extractall('/tmp')\n",
        "zip_ref.close()\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "DM851ZmN28J3",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "print(len(os.listdir('/tmp/PetImages/Cat/')))\n",
        "print(len(os.listdir('/tmp/PetImages/Dog/')))\n",
        "\n",
        "# Expected Output:\n",
        "# 12501\n",
        "# 12501"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "F-QkLjxpmyK2",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "try:\n",
        "    os.mkdir('/tmp/cats-v-dogs')\n",
        "    os.mkdir('/tmp/cats-v-dogs/training')\n",
        "    os.mkdir('/tmp/cats-v-dogs/testing')\n",
        "    os.mkdir('/tmp/cats-v-dogs/training/cats')\n",
        "    os.mkdir('/tmp/cats-v-dogs/training/dogs')\n",
        "    os.mkdir('/tmp/cats-v-dogs/testing/cats')\n",
        "    os.mkdir('/tmp/cats-v-dogs/testing/dogs')\n",
        "except OSError:\n",
        "    pass"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "zvSODo0f9LaU",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):\n",
        "    files = []\n",
        "    for filename in os.listdir(SOURCE):\n",
        "        file = SOURCE + filename\n",
        "        if os.path.getsize(file) > 0:\n",
        "            files.append(filename)\n",
        "        else:\n",
        "            print(filename + \" is zero length, so ignoring.\")\n",
        "\n",
        "    training_length = int(len(files) * SPLIT_SIZE)\n",
        "    testing_length = int(len(files) - training_length)\n",
        "    shuffled_set = random.sample(files, len(files))\n",
        "    training_set = shuffled_set[0:training_length]\n",
        "    testing_set = shuffled_set[:testing_length]\n",
        "\n",
        "    for filename in training_set:\n",
        "        this_file = SOURCE + filename\n",
        "        destination = TRAINING + filename\n",
        "        copyfile(this_file, destination)\n",
        "\n",
        "    for filename in testing_set:\n",
        "        this_file = SOURCE + filename\n",
        "        destination = TESTING + filename\n",
        "        copyfile(this_file, destination)\n",
        "\n",
        "\n",
        "CAT_SOURCE_DIR = \"/tmp/PetImages/Cat/\"\n",
        "TRAINING_CATS_DIR = \"/tmp/cats-v-dogs/training/cats/\"\n",
        "TESTING_CATS_DIR = \"/tmp/cats-v-dogs/testing/cats/\"\n",
        "DOG_SOURCE_DIR = \"/tmp/PetImages/Dog/\"\n",
        "TRAINING_DOGS_DIR = \"/tmp/cats-v-dogs/training/dogs/\"\n",
        "TESTING_DOGS_DIR = \"/tmp/cats-v-dogs/testing/dogs/\"\n",
        "\n",
        "split_size = .9\n",
        "split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)\n",
        "split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)\n",
        "\n",
        "# Expected output\n",
        "# 666.jpg is zero length, so ignoring\n",
        "# 11702.jpg is zero length, so ignoring"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "hwHXFhVG3786",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))\n",
        "print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))\n",
        "print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))\n",
        "print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))\n",
        "\n",
        "# Expected output:\n",
        "# 11250\n",
        "# 11250\n",
        "# 1250\n",
        "# 1250"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "-BQrav4anTmj",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "model = tf.keras.models.Sequential([\n",
        "    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),\n",
        "    tf.keras.layers.MaxPooling2D(2, 2),\n",
        "    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2, 2),\n",
        "    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2, 2),\n",
        "    tf.keras.layers.Flatten(),\n",
        "    tf.keras.layers.Dense(512, activation='relu'),\n",
        "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
        "])\n",
        "\n",
        "model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "fQrZfVgz4j2g",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "\n",
        "TRAINING_DIR = \"/tmp/cats-v-dogs/training/\"\n",
        "# Experiment with your own parameters here to really try to drive it to 99.9% accuracy or better\n",
        "train_datagen = ImageDataGenerator(rescale=1./255,\n",
        "      rotation_range=40,\n",
        "      width_shift_range=0.2,\n",
        "      height_shift_range=0.2,\n",
        "      shear_range=0.2,\n",
        "      zoom_range=0.2,\n",
        "      horizontal_flip=True,\n",
        "      fill_mode='nearest')\n",
        "train_generator = train_datagen.flow_from_directory(TRAINING_DIR,\n",
        "                                                    batch_size=100,\n",
        "                                                    class_mode='binary',\n",
        "                                                    target_size=(150, 150))\n",
        "\n",
        "VALIDATION_DIR = \"/tmp/cats-v-dogs/testing/\"\n",
        "# Experiment with your own parameters here to really try to drive it to 99.9% accuracy or better\n",
        "validation_datagen = ImageDataGenerator(rescale=1./255,\n",
        "      rotation_range=40,\n",
        "      width_shift_range=0.2,\n",
        "      height_shift_range=0.2,\n",
        "      shear_range=0.2,\n",
        "      zoom_range=0.2,\n",
        "      horizontal_flip=True,\n",
        "      fill_mode='nearest')\n",
        "validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,\n",
        "                                                              batch_size=100,\n",
        "                                                              class_mode='binary',\n",
        "                                                              target_size=(150, 150))\n",
        "\n",
        "# Expected Output:\n",
        "# Found 22498 images belonging to 2 classes.\n",
        "# Found 2500 images belonging to 2 classes."
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "5qE1G6JB4fMn",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# Note that this may take some time.\n",
        "history = model.fit_generator(train_generator,\n",
        "                              epochs=15,\n",
        "                              verbose=1,\n",
        "                              validation_data=validation_generator)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "MWZrJN4-65RC",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "%matplotlib inline\n",
        "\n",
        "import matplotlib.image  as mpimg\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "#-----------------------------------------------------------\n",
        "# Retrieve a list of list results on training and test data\n",
        "# sets for each training epoch\n",
        "#-----------------------------------------------------------\n",
        "acc=history.history['acc']\n",
        "val_acc=history.history['val_acc']\n",
        "loss=history.history['loss']\n",
        "val_loss=history.history['val_loss']\n",
        "\n",
        "epochs=range(len(acc)) # Get number of epochs\n",
        "\n",
        "#------------------------------------------------\n",
        "# Plot training and validation accuracy per epoch\n",
        "#------------------------------------------------\n",
        "plt.plot(epochs, acc, 'r', \"Training Accuracy\")\n",
        "plt.plot(epochs, val_acc, 'b', \"Validation Accuracy\")\n",
        "plt.title('Training and validation accuracy')\n",
        "plt.figure()\n",
        "\n",
        "#------------------------------------------------\n",
        "# Plot training and validation loss per epoch\n",
        "#------------------------------------------------\n",
        "plt.plot(epochs, loss, 'r', \"Training Loss\")\n",
        "plt.plot(epochs, val_loss, 'b', \"Validation Loss\")\n",
        "plt.figure()\n",
        "\n",
        "\n",
        "# Desired output. Charts with training and validation metrics. No crash :)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "LqL6FYUrtXpf",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# Here's a codeblock just for fun. You should be able to upload an image here \n",
        "# and have it classified without crashing\n",
        "import numpy as np\n",
        "from google.colab import files\n",
        "from keras.preprocessing import image\n",
        "\n",
        "uploaded = files.upload()\n",
        "\n",
        "for fn in uploaded.keys():\n",
        " \n",
        "  # predicting images\n",
        "  path = '/content/' + fn\n",
        "  img = image.load_img(path, target_size=(150, 150))\n",
        "  x = image.img_to_array(img)\n",
        "  x = np.expand_dims(x, axis=0)\n",
        "\n",
        "  images = np.vstack([x])\n",
        "  classes = model.predict(images, batch_size=10)\n",
        "  print(classes[0])\n",
        "  if classes[0]>0.5:\n",
        "    print(fn + \" is a dog\")\n",
        "  else:\n",
        "    print(fn + \" is a cat\")"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}